************************************************************
*****     Heterogeneous Treatment Effect (CATE)        *****
*****          Analysis with Causal Forests            *****
************************************************************


/* CONTENTS:
1.  Data preparation for analysis in R
	1.1 Create CF features 
	1.2 Create samples with non-repeated observations
	1.3 Create CF input data (combine samples & features)

(run R code)

2. Plot and tables
	2.1 Analysis by median
*/


clear all 

** Setup globals
global delta = 4
set scheme plotplain

global output_CATE "$output\HTE"


*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
*-*-*-* 1. DATA PREPARATION *-*-*-*-*-*
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

**************************
* 1.1 CREATE CF FEATURES *
**************************

****************************************************************************
* Matched Sample Data for CF:
* Create data with outcomes:
*     - change in average income (between post- and pre-shock)
*     - working probability after the shock
* and features including: 
*     - mother's age and child's age in the year of teartment group's shock
****************************************************************************

{
* Import and merge hospitalization data
use "$processed_data\date_diagnosis.dta", clear 
merge 1:m shnro_mother using ///
"$processed_data\mothers_income_diagnosis4", keep(3) nogen
merge 1:1 shnro_mother treat using ///
"$processed_data\fathers_income_diagnosis4", keep(3) nogen 

* Subset relevant variables
global rel_vars shnro_mother mother_age finnish married single mother_b_year age_mother_inp ///
			    male child_b_date child_b_year first_birth ///
			    yearinp ageinphospitalnew treat ageinptreat year_treat ///
			    income_* working_year* mentalhealthyear* ///
			    shnro_father father_income_* ///
			    educ0 age_father_inp father_educ0 ///
			    ICD10 nchild_year0 
keep $rel_vars
drop mentalhealthyearminus2_any

* Reshape the data
egen new_shnromother = group(shnro_mother treat)
reshape long income_ working_ father_income_ mentalhealth, ///
i(new_shnromother) j(year_event) string
drop new_shnromother
rename (*income_) *income
rename working_ working
 
* Gen numeric variable for time w.r.t hospitalization (hosp year==0)
gen time_event=.
forvalues x=0(1)3{
	replace time_event=`x' if year_event=="year`x'"
}
forvalues x=1(1)5{
	replace time_event=-`x' if year_event=="yearminus`x'"
}

keep if time_event <= 5

* generate post dummy
gen post = time_event >= 0

* gen the age of each mother in year 0 of the T group's shock and add it to all rows 
* (for later duplicate dropping and) to allow for controlling for mother's age in CF
* start by generating calendar year when group T's shock occurred
gen year_calendar = year_treat + time_event
* gen father's year of birth
gen father_b_year = yearinp - age_father_inp
* gen age of each parent in year 0 of the T group's shock
gen age_mother_y0 = year_calendar - mother_b_year if time_event == 0
gen age_father_y0 = year_calendar - father_b_year if time_event == 0

* add this age to all rows
sort shnro_mother shnro_father year_treat time_event
* account for the fact that a given mother can be included multiple times
* with a different father but the same treatment year, hence sort on both shnrs
bys shnro_mother shnro_father year_treat: replace age_mother_y0 = age_mother_y0[6] if age_mother_y0 == .
label var age_mother_y0 "Mother's age when (corresponding) T group has the shock"
sort shnro_father shnro_mother year_treat time_event
bys shnro_father shnro_mother year_treat: replace age_father_y0 = age_father_y0[6] if age_father_y0 == .
label var age_father_y0 "Father's age when (corresponding) T group has the shock"

* Generate within household gender gap in labour earnings (at period -2)
gen earnings_gap_hh = father_income - income if time_event == -2
* add to all rows
sort shnro_father shnro_mother year_treat time_event
bys shnro_father shnro_mother year_treat: replace earnings_gap_hh = earnings_gap_hh[4] if earnings_gap_hh == .
label var earnings_gap_hh "Within household gender gap in total earnings (father-mother) at period -2"

* Generate pre-shock and post-shock working dummies
* pre: 1 if all years recorded as working
* post: 1 if all years recorded as working, 0 otherwise
replace working = 0 if working == .
rename working working_year
bys shnro_mother shnro_father year_treat post: egen work_temp = mean(working_year)
bys shnro_mother shnro_father year_treat: gen working = (work_temp == 1)
* add post and pre-working to all rows for duplicate dropping
gen post_working = .
gen pre_working = .
sort shnro_mother shnro_father year_treat time_event
bys shnro_mother shnro_father year_treat: replace pre_working = working[1] if working != .
bys shnro_mother shnro_father year_treat: replace post_working = working[6] if working != .
label var pre_working "Working in all the years before the shock"
label var post_working "Working in all the years after the shock"
drop work_temp working_year working working_year*

* Generate pre-shock and post-shock mental health dummies
* 1 if any mental health visit recorded in any of the pre-shock/post-shock years, respectively
* 0 otherwise (if absolutely no mh visit in any of the pre-/post-shock years)
* impute missing with 0 if needed (0 real changes made)
replace mentalhealth = 0 if mentalhealth == .
rename mentalhealth mentalhealth_year
bys shnro_mother shnro_father year_treat post: egen mentalhealth_temp = mean(mentalhealth_year)
bys shnro_mother shnro_father year_treat: gen mentalhealth_any = (mentalhealth_temp > 0)
* add post and pre-working to all rows for duplicate dropping
gen post_mentalhealth = .
gen pre_mentalhealth = .
sort shnro_mother shnro_father year_treat time_event
bys shnro_mother shnro_father year_treat: replace pre_mentalhealth = mentalhealth_any[1] if mentalhealth_any != .
bys shnro_mother shnro_father year_treat: replace post_mentalhealth = mentalhealth_any[6] if mentalhealth_any != .
label var pre_mentalhealth "Mental health visit in any of the years before the shock"
label var post_mentalhealth "Mental health visit in any of the years after the shock"
drop mentalhealth_temp mentalhealth_year mentalhealth_any

* drop temporary vars and vars not constant across rows 
drop year_calendar mother_b_year father_b_year time_event year_event

* generate the age of each child in the year when T group's shock occurs (year 0)
* to allow for controlling for child age in CF 
gen age_child_y0 = .
* for T group: age is same as age of first hospital admission (age at y0)
* for C group: age in T's year 0 is C's age \delta years before their own hospitalization
replace age_child_y0 = ageinphospitalnew if treat == 1
replace age_child_y0 = ageinphospitalnew - $delta if treat == 0
label var age_child_y0 "Child's age when (corresponding) T group has the shock"

* Generate only-child vs siblings indicator
gen siblings = 0 if nchild_year0 <= 1 // sometimes nchild_year0 is coded as 0, assign this as only child too
replace siblings = 1 if nchild_year0 > 1 & nchild_year0 != .

* Generate average pre-shock and post-shock income
// year_treat distinguishes between multiple T and C obs from the same mother
bys shnro_mother year_treat post: egen avg_income = mean(income)
bys shnro_father year_treat post: egen avg_father_income = mean(father_income)

* keep 1 row for each obs (make it year0 row to avoid losing maternal age)
drop income father_income
duplicates drop

reshape wide avg_income avg_father_income, ///
i(shnro_mother shnro_father year_treat) j(post)
* rename 
foreach var of varlist avg_income0 avg_father_income0 {
	local newvar = substr("`var'", 1, length("`var'")-1)
	rename `var' pre_`newvar'
}
foreach var of varlist avg_income1 avg_father_income1 {
	local newvar = substr("`var'", 1, length("`var'")-1)
	rename `var' post_`newvar'
}

* gen change in income (between pre- and post-shock avearges) in absolute terms (Euros)
gen change_income_postpre = (post_avg_income - pre_avg_income)
label var change_income_postpre "Change btw post- and pre-shock avg_income in absolute terms (mothers)"
	

*** Prepare background characteristics for CF

* Income quartiles (pre-shock income)
label define qrt_inc_lab 1 "Income Q1" 2 "Income Q2" 3 "Income Q3" 4 "Income Q4"
foreach var of varlist pre_avg_income pre_avg_father_income {
	xtile `var'_qrt = `var', nq(4)
	label values `var'_qrt qrt_inc_lab
	label var `var'_qrt "`var' quartiles"
} 

* Age groups
* Parents - categories: below 30, 5 year intervals, above 60
foreach var of varlist age_mother_y0 age_father_y0 {
	egen `var'_cat = cut(`var'), at (0, 30, 35, 40, 45, 50, 55, 60, 100) label 
}
* Child - 3 year intervals btw 7 and 18yo
egen age_child_y0_cat = cut(age_child_y0), at (7, 10, 13, 16, 19) label
label define child_age_lab 0 "7, 8, 9 yo" 1 "10, 11, 12 yo" 2 "13, 14, 15 yo" 3 "16, 17, 18 yo"
label values age_child_y0_cat child_age_lab

* Within household gender gap in labour earnings - quartiles
label define qrt_earn_gap 1 "Earnings gap Q1" 2 "Earnings gap Q2" 3 "Earnings gap Q3" 4 "Earnings gap Q4"
xtile earnings_gap_hh_qrt = earnings_gap_hh, nq(4)
label values earnings_gap_hh_qrt qrt_earn_gap
label var earnings_gap_hh_qrt "earnings_gap_hh quartiles"

	
* Generate high-level subgroups
g grouponly = substr(ICD10,1,2) 
g icd10_group = 1 if substr(grouponly,1,1) == "A" | substr(grouponly,1,1) == "B" 
replace icd10_group = 2 if substr(grouponly,1,1) == "C"
replace icd10_group = 2 if substr(grouponly,1,2) == "D1" | ///
substr(grouponly,1,2) == "D2" | substr(grouponly,1,2) == "D3" | ///
substr(grouponly,1,2) == "D4"   
replace icd10_group = 3 if substr(grouponly,1,1) == "D" & icd10_group != 2
replace icd10_group = 4 if substr(grouponly,1,1) == "E"
replace icd10_group = 5 if substr(grouponly,1,1) == "F"
replace icd10_group = 6 if substr(grouponly,1,1) == "G"
replace icd10_group = 7 if substr(grouponly,1,2) == "H1" | ///
substr(grouponly,1,2) == "H2" | substr(grouponly,1,2) == "H3" | ///
substr(grouponly,1,2) == "H4"   | substr(grouponly,1,2) == "H5"
replace icd10_group = 8 if substr(grouponly,1,1) == "H" & icd10_group != 7
replace icd10_group = 9 if substr(grouponly,1,1) == "I"
replace icd10_group = 10 if substr(grouponly,1,1) == "J"
replace icd10_group = 11 if substr(grouponly,1,1) == "K"
replace icd10_group = 12 if substr(grouponly,1,1) == "L"
replace icd10_group = 13 if substr(grouponly,1,1) == "M"
replace icd10_group = 14 if substr(grouponly,1,1) == "N"
replace icd10_group = 15 if substr(grouponly,1,1) == "O"
replace icd10_group = 17 if substr(grouponly,1,1) == "Q"
replace icd10_group = 18 if substr(grouponly,1,1) == "R"
replace icd10_group = 19 if substr(grouponly,1,1) == "S" | ///
substr(grouponly,1,1) == "T"
replace icd10_group = 21 if substr(grouponly,1,1) == "Z"
* Note: group 20 only one observation
label define icd10 1 "Infections" 2 "Neoplasms" 3 "Blood" ///
4 "Endocrine" 5 "Mental" 6 "Nervous" 7 "Eye" 8 "Ear" 9 "Circulatory" ///
10 "Respiratory" 11 "Digestive" 12 "Skin" 13 "Muscoloskeletal" ///
14 "Genitourinary" 17 "Congenital" ///
18 "Symptoms" 19 "Injury" 21 "Factors", replace 
label val icd10_group icd10
tab icd10_group,m 
drop grouponly
	   

* save data, to be imported to R notebook
save "$processed_data\cf_input_mother_father.dta", replace
//// cf_input_mother_father.dta dimensions: 71,628 obs 42 vars

}



*****************************************************
* 1.2 CREATE SAMPLES WITH NON-REPEATED OBSERVATIONS *
*****************************************************

*******************************************************************
* Matched Sample Data DiD Hospitalizations for CF:

* 1) Create sample where: 
*       no observation belongs to both T and C groups
                                                                 
* 2) Create sample where:
*       no observation belongs to both T and C groups & 
*       the number of T and C observations match (can be paired)
                            				
* Note: "duplication" in T and C groups always considered 
*        based on mother's ID
*******************************************************************
{
* Import and merge the data
use "$processed_data\date_diagnosis.dta", clear 
merge 1:m shnro_mother using ///
"$processed_data\mothers_income_diagnosis4", keep(3) nogen
merge 1:1 shnro_mother treat using ///
"$processed_data\fathers_income_diagnosis4", keep(3) nogen 

** Implement sample conditions
keep if sample_hospital == 1


*----------------*
*  Subsample 1)  *
*----------------*
** Eliminate duplicate use of observations (as both treatment and control) based on mother's ID

* tag observations that are both T and C based on mother's ID
bys shnro_mother: gen double_occ = (_N > 1) // 26,344 observations appear more than once (both as T and C)
		
* randomly drop one of these observations
set seed 123456
* tag 1 of each pair of double observations
egen tag = tag(shnro_mother) if double_occ == 1 
* assign random values to the two observations in a pair that sum to 1
gen rand = runiform() if tag == 1
replace rand = (1-rand[_n-1]) if rand == . & double_occ == 1
* keep the observation with the random value smaller than 0.5
drop if rand < 0.5
* resolve the unlikely case when both are exactly 0.5, drop the treatment bc there are more treated
if (rand == 0.5){
      drop if treat == 1 
}
drop tag double_occ rand

* save data
save "$processed_data\cf_no_repeated_obs_sample.dta", replace // 35,102 obs (all unique shnro_mother)


*----------------*
*  Subsample 2)  *
*----------------*
** Keep only 1 control observation for each treated observation, and vice versa

* identify groups
egen groupid = group(child_b_year mother_b_year)
* keep the same number of T and C obs within each group
set seed 123456
// order observations within group and treatment type randomly
// drop the ones that do not have a pair (essentially, running out of pairs)
gen rand_order = runiform()
sort rand_order
bys groupid treat: gen within_group_id = (_n)
bys groupid within_group_id: keep if _N == 2 

drop rand_order within_group_id

* save data
save "$processed_data\cf_no_repeated_obs_pairs_sample.dta", replace // 28,414 obs (all unique shnro_mother)

			
*********************************************************
* 1.3 CREATE CF INPUT DATA (COMBINE SAMPLES & FEATURES) *
*********************************************************

*************************************************************************
* Merge change in avg income, and age variables into subsamples 1) and 2)
* (To be used as CF's main outcome and features) 
*************************************************************************

global vars_keep change_income_postpre post_working post_mentalhealth treat ///
				 pre_avg_income_qrt pre_avg_father_income_qrt ///
				 age_child_y0_cat age_mother_y0_cat age_father_y0_cat ///
				 educ0 father_educ0 ///
				 finnish married single male ///
				 earnings_gap_hh_qrt icd10_group siblings ///
				 pre_mentalhealth

*** SPECIFY WHICH DATASETS TO MERGE
* 1. which no-duplicate (in T & C groups) to use?
global norep_sample "no_repeated_obs"
* 2. which CF pre-processed input data to use?
global cf_preproc "mother_father"

use "$processed_data\cf_${norep_sample}_sample.dta", clear 
merge 1:1 shnro_mother year_treat using "$processed_data\cf_input_$cf_preproc.dta", keep(3) nogen
keep $vars_keep
save "$processed_data\cf_input_${norep_sample}.dta", replace   // 35,102 obs (all unique shnro_mother)


*** NEW SPECIFICATION OF GLOBALS 
* 1. which no-duplicate (in T & C groups) to use?
global norep_sample "no_repeated_obs_pairs"
* 2. which CF pre-processed input data to use?
global cf_preproc "mother_father"

use "$processed_data\cf_${norep_sample}_sample.dta", clear 
merge 1:1 shnro_mother year_treat using "$processed_data\cf_input_$cf_preproc.dta", keep(3) nogen
keep $vars_keep
save "$processed_data\cf_input_${norep_sample}.dta", replace  // 28,414 obs (all unique shnro_mother)
}

exit 


*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*
*<>*<>*<>*<>*<>*<>*<>*<>*<>            RUN R CODE HERE            *<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*
*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*<>*


*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
*-*-*-*-*      2. PLOTS & TABLES    *-*-*-*-*-*
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

clear all

{
global all_outcomes "change_income_postpre post_working post_mentalhealth"

* Set globals 
global feature_num "numfeat_14"
global selected_data "no_repeated_obs"
* OLS line addition to histograms
global ols_income_postpre = -652.302
global ols_post_working = -0.014
* Save everything to the same output folder (option to save elsewhere)
global output_CATE_selected "$output_CATE"
* Could add number of features or sample to file names
*global feature_num "numfeat_14"
*global selected_data "no_repeated_obs"

* Create HTE tables for all outcome variables
foreach j in 1 2 3 {
	
	global selected_outcome `: word `j' of $all_outcomes'

	* Import predicted CATE from R
	use "$processed_data\cf_output_${selected_data}_${feature_num}_${selected_outcome}.dta", clear 

	* Prepare for histogram
	label var pred_eff "Conditional Average Treatment Effect"
	local hist_subt ""
	if "$selected_outcome" == "change_income_postpre" {
		local outcome_label "Earnings"
		local ols_avg ${ols_income_postpre}
		local nfig a
		local ntab 23
	}
	if "$selected_outcome" == "post_working" {
		local outcome_label "Employment"
		local ols_avg ${ols_post_working}
		local nfig b
		local ntab 24
	}
	if "$selected_outcome" == "post_mentalhealth" {
		local outcome_label "Mental health"
		local ols_avg -100000000 // intentionally too large to appear on plot
		local nfig c
		local ntab 25
	}
	
	* Histogram of CATE for income change after the shock
	qui: sum pred_eff
	local range_min = round(`r(min)', 0.001)
	local range_max = round(`r(max)', 0.001)

	
	* main histogram, without range, without OLS coeff
	hist pred_eff, percent xline(`r(mean)') ///
	title(`outcome_label') ///
	subtitle(`hist_subt')
	graph export "$output_CATE_selected\fig A15`nfig'_fi.pdf" , replace

	


	* Pre-processing for analysis by quartile and median
	{	
	* Comparison of background characteristics btw CATE quartiles 1 and 4 
	* Create CATE quartiles from least affected (Q1) to most affected (Q4)
	* Therefore, for (mostly) negative treatment effects: 
	*            bottom quartile Q1 = close to 0, top quartile Q4 = very negative
	*            for (mostly) positive treatment effects:
	*			 bottom quartile Q1 = close to 0, top quartile Q4 = very positive
	qui: sum(pred_eff)
	if `r(mean)' > 0 {
		xtile pred_eff_qrt = pred_eff, nq(4)
		label define qrt_lab 1 "Q1 (smallest TE)" 2 "Q2" 3 "Q3" 4 "Q4 (largest TE)"
		label values pred_eff_qrt qrt_lab
	}
	else if `r(mean)' <= 0 {
		gen pred_eff_temp = (-1) * pred_eff
		xtile pred_eff_qrt = pred_eff_temp, nq(4)
		label define qrt_lab 1 "Q1 (smallest TE)" 2 "Q2" 3 "Q3" 4 "Q4 (largest TE)"
		label values pred_eff_qrt qrt_lab
	}


	* Create CATE below and above median groups
	* Same logic with signs as for quartiles
	* When TE are -ve: below median = close to 0, small TE; above median = very negative, large TE
	* When TE are +ve: below median = close to 0, small TE; above median = very positive, large TE
	qui: sum(pred_eff)
	if `r(mean)' > 0 {
		xtile pred_eff_median = pred_eff, nq(2)
		label define median_lab 1 "Below median" 2 "Above median"
		label values pred_eff_median median_lab
	}
	else if `r(mean)' <= 0 {
		* pred_eff_temp already gen
		xtile pred_eff_median = pred_eff_temp, nq(2)
		label define median_lab 1 "Below median (small TE)" 2 "Above median (large TE)"
		label values pred_eff_median median_lab
	}

	* Dummify categorical features
	global vars_to_dummify age_mother_y0_cat age_father_y0_cat age_child_y0_cat ///
						   educ0 father_educ0  pre_avg_income_qrt pre_avg_father_income_qrt ///
						   earnings_gap_hh_qrt icd10_group
	foreach var of varlist $vars_to_dummify {
	tab `var', gen(`var'_bin_)
	}

	* Collect characteristics of interest
	* Dummies - 14 features
	global vars_bin male siblings age_child_y0_cat_bin* ///
	age_mother_y0_cat_bin* educ0_bin* finnish single married pre_avg_income_qrt_bin* ///
	age_father_y0_cat_bin* father_educ0_bin* pre_avg_father_income_qrt_bin* ///
	earnings_gap_hh_qrt_bin* ///
	icd10_group_bin*

	* Fix labels 
	label var icd10_group "ICD10 group category"
	foreach var of varlist age_child_y0_cat_bin* {
	local lab_var: variable label `var'
	local new_lab = "Child " + substr("`lab_var'", 19, 15)
	label var `var' "`new_lab'"
	}
	foreach var of varlist age_mother_y0_cat_bin* {
	local lab_var: variable label `var'
	local new_lab = "Mother " + substr("`lab_var'", 20, 5)
	label var `var' "`new_lab'"
	}
	label var age_mother_y0_cat_bin_1 "Mother below 30"
	foreach var of varlist age_father_y0_cat_bin* {
	local lab_var: variable label `var'
	local new_lab = "Father " + substr("`lab_var'", 20, 5)
	label var `var' "`new_lab'"
	}
	label var age_father_y0_cat_bin_1 "Father below 30"
	foreach var of varlist educ0_bin* {
	local lab_var: variable label `var'
	local new_lab = "Mother educ " + substr("`lab_var'", 12, 2)
	label var `var' "`new_lab'"
	}
	foreach var of varlist father_educ0_bin* {
	local lab_var: variable label `var'
	local new_lab = "Father educ " + substr("`lab_var'", 19, 2)
	label var `var' "`new_lab'"
	}
	foreach var of varlist pre_avg_income_qrt_bin* {
	local lab_var: variable label `var'
	local new_lab = "Mother " + substr("`lab_var'", 21, 15)
	label var `var' "`new_lab'"
	}
	foreach var of varlist pre_avg_father_income_qrt_bin* {
	local lab_var: variable label `var'
	local new_lab = "Father " + substr("`lab_var'", 28, 15)
	label var `var' "`new_lab'"
	}
	foreach var of varlist earnings_gap_hh_qrt_bin* {
	local lab_var: variable label `var'
	local new_lab = "HH " + substr("`lab_var'", 22, 17)
	label var `var' "`new_lab'"
	}
	foreach var of varlist icd10_group_bin* {
	local lab_var: variable label `var'
	local new_lab = "ICD10 " + substr("`lab_var'", 14, 12)
	label var `var' "`new_lab'"
	}
	label var finnish "Finnish mother"
	label var single "Single mother"
	label var married "Married mother"
	* Education labels from FOLK metadata
	label var educ0_bin_1 "Upper secondary education"
	label var educ0_bin_2 "Post-secondary non-tertiary education"
	label var educ0_bin_3 "Short-cycle tertiary education"
	label var educ0_bin_4 "Bachelor's or equivalent level"
	label var educ0_bin_5 "Master's or equivalent level"
	label var educ0_bin_6 "Doctoral or equivalent level"

	label var father_educ0_bin_1 "Upper secondary education"
	label var father_educ0_bin_2 "Post-secondary non-tertiary education"
	label var father_educ0_bin_3 "Short-cycle tertiary education"
	label var father_educ0_bin_4 "Bachelor's or equivalent level"
	label var father_educ0_bin_5 "Master's or equivalent level"
	label var father_educ0_bin_6 "Doctoral or equivalent level"

	label var siblings "Siblings"


	*****************************
	* 2.2 ANALYSIS BY MEDIAN *
	*****************************

	* Explanatory note
	global note_median "Below median represents the numerically smaller (closer to 0) predicted effect category, while above median effects are numerically larger."
	
	*-------------------------*
	* 2.2.2 BM v AM - dummies *
	*-------------------------*

	* Full table with summary stats + diff. + p-value (Below vs Above median)
	preserve

	* Estimates
	eststo clear
	* Summary columns 1-2
	bys pred_eff_median: eststo: estpost summarize $vars_bin
	* Diff. + p-value columns 3-4
	eststo: estpost ttest $vars_bin, by(pred_eff_median)

	esttab est1 est2 est3 using "${output_CATE_selected}\tab A`ntab'_fi.tex", replace ///
	cells("mean(pattern(1 1 0) fmt(%9.3f)) b(pattern(0 0 1) fmt(%9.3f)) p(pattern(0 0 1) fmt(%9.3f))") ///
	mlabels ("Below Median" "Above Median" " ") ///
	collabels("Mean" "Diff." "p-value") ///
	nonumbers nodepvar label addnote($note_median)

	restore
	}
}
}

* End of file